1 Load Packages

library(haven)
## Warning: package 'haven' was built under R version 4.2.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(effsize)
library(rstatix)
## Warning: package 'rstatix' was built under R version 4.2.3
## 
## Attaching package: 'rstatix'
## The following object is masked from 'package:stats':
## 
##     filter
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.3
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 4.2.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.3
## Warning: package 'tibble' was built under R version 4.2.3
## Warning: package 'tidyr' was built under R version 4.2.3
## Warning: package 'readr' was built under R version 4.2.3
## Warning: package 'purrr' was built under R version 4.2.3
## Warning: package 'forcats' was built under R version 4.2.3
## Warning: package 'lubridate' was built under R version 4.2.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ purrr     1.0.1     ✔ tidyr     1.3.0
## ✔ readr     2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ rstatix::filter() masks dplyr::filter(), stats::filter()
## ✖ dplyr::lag()      masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(rafalib)
library(ggsci)
## Warning: package 'ggsci' was built under R version 4.2.3
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(knitr)
## Warning: package 'knitr' was built under R version 4.2.3
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(formattable)
## 
## Attaching package: 'formattable'
## 
## The following object is masked from 'package:plotly':
## 
##     style
library(data.table)
## Warning: package 'data.table' was built under R version 4.2.3
## 
## Attaching package: 'data.table'
## 
## The following objects are masked from 'package:lubridate':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year
## 
## The following object is masked from 'package:purrr':
## 
##     transpose
## 
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
library(pwrss)
## Warning: package 'pwrss' was built under R version 4.2.3
## 
## Attaching package: 'pwrss'
## 
## The following object is masked from 'package:stats':
## 
##     power.t.test
library(ShortForm)
## Package 'ShortForm' version 0.5.2
library(psych)
## Warning: package 'psych' was built under R version 4.2.3
## 
## Attaching package: 'psych'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 
## The following object is masked from 'package:effsize':
## 
##     cohen.d
library(data.table)
library(ggridges)
## Warning: package 'ggridges' was built under R version 4.2.3
library(dotwhisker)
## Warning: package 'dotwhisker' was built under R version 4.2.3
library(magrittr)
## 
## Attaching package: 'magrittr'
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
library(lm.beta)
## Warning: package 'lm.beta' was built under R version 4.2.3
library(tibble)
library(jtools)
## Warning: package 'jtools' was built under R version 4.2.3
library(scales)
## 
## Attaching package: 'scales'
## 
## The following objects are masked from 'package:psych':
## 
##     alpha, rescale
## 
## The following objects are masked from 'package:formattable':
## 
##     comma, percent, scientific
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
library(lme4)
## Warning: package 'lme4' was built under R version 4.2.3
## Loading required package: Matrix
## Warning: package 'Matrix' was built under R version 4.2.3
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
library(ggstatsplot)
## Warning: package 'ggstatsplot' was built under R version 4.2.3
## You can cite this package as:
##      Patil, I. (2021). Visualizations with statistical details: The 'ggstatsplot' approach.
##      Journal of Open Source Software, 6(61), 3167, doi:10.21105/joss.03167
library(correlation)
## 
## Attaching package: 'correlation'
## 
## The following object is masked from 'package:rstatix':
## 
##     cor_test
library(see)
## Warning: package 'see' was built under R version 4.2.3
## 
## Attaching package: 'see'
## 
## The following objects are masked from 'package:ggsci':
## 
##     scale_color_material, scale_colour_material, scale_fill_material
library(tidygraph)
## Warning: package 'tidygraph' was built under R version 4.2.3
## 
## Attaching package: 'tidygraph'
## 
## The following object is masked from 'package:stats':
## 
##     filter
library(GGally)
## Warning: package 'GGally' was built under R version 4.2.3
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
library(sjPlot)
## Warning: package 'sjPlot' was built under R version 4.2.3
## #refugeeswelcome
library(lavaan)
## Warning: package 'lavaan' was built under R version 4.2.3
## This is lavaan 0.6-15
## lavaan is FREE software! Please report any bugs.
## 
## Attaching package: 'lavaan'
## 
## The following object is masked from 'package:psych':
## 
##     cor2cov
library(semPlot)

2 Portfolio 1: Box

#This study used a mixed design.
#We measured participants positive attitudes to toward Asians and Whites before and after the condition.
#Participants are Asian Americans. 
#The condition is whether being rejected by Asian or White peers. 
#I excluded people who do not identify as Asian Americans.
#Here, I only examine the within condition attitude change.
#That is, for those who experienced Asian prejudice condition, do their positive attitudes decrease toward Asians?
#And, for those who experienced White prejudice condition, do their positive attitudes decrease toward Whites? 

Adata <- read_sav("C:/Users/Colin/Documents/GitHub/Colin_portfolio/p01/data/AQ.sav")
Adf <- Adata %>% 
  filter(Asian == "1" & Conditions == "Ingroup")

t.test(Adf$Q95, Adf$Asian_Warmth, paired = TRUE, alternative = "two.sided") 
## 
##  Paired t-test
## 
## data:  Adf$Q95 and Adf$Asian_Warmth
## t = 2.2494, df = 91, p-value = 0.0269
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  0.02669492 0.42982682
## sample estimates:
## mean difference 
##       0.2282609
Wdf <- Adata %>%
  filter(Asian == "1" & Conditions == "Outgroup")

t.test(Wdf$Q96, Wdf$White_Warmth, paired = TRUE, alternative = "two.sided")
## 
##  Paired t-test
## 
## data:  Wdf$Q96 and Wdf$White_Warmth
## t = -0.4356, df = 88, p-value = 0.6642
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.1874901  0.1200743
## sample estimates:
## mean difference 
##     -0.03370787

2.1 P1 Boxplot 1

Before <-Adf$Q95
After <-Adf$Asian_Warmth
Adf2 <- data.frame(Before = Before, After = After)
         ggpaired(Adf2, cond1 = "Before", cond2 = "After",
         fill = "condition", line.color = "gray", line.size = 0.3,
         palette = "jco", xlab = "Condition", ylab = "Asian Warmth", caption = "t(91) = 2.25, 95% CI [0.03 0.43], Cohen's d = 0.24
") +  ggtitle("Before and After Positive Attitudes toward Asians"
)  +  scale_y_continuous(limit = c(0, 10)) +
 stat_compare_means(vjust = 0.5, method = "t.test", paired = TRUE) +
  stat_summary(fun = "mean",
               geom = "point", 
               width = 0.5,
               colour = "white")
## Warning in stat_summary(fun = "mean", geom = "point", width = 0.5, colour =
## "white"): Ignoring unknown parameters: `width`

#conclusion: Asians who experienced ingroup prejudice felt significantly less warm toward Asians, compared to their baseline attitudes; the mean difference is only .22, so it's not very visible in the boxplot

2.2 P1 Boxplot 2

Before <-Wdf$Q96
After <-Wdf$White_Warmth
Wdf2 <- data.frame(Before = Before, After = After)
Wdf2 <- Wdf2[complete.cases(Wdf2),]
         ggpaired(Wdf2, cond1 = "Before", cond2 = "After",
         fill = "condition", line.color = "gray", line.size = 0.3,
         palette = "jco", xlab = "Condition", ylab = "White Warmth", caption = "t(88) = -0.44, 95% CI [-0.19, 0.12], Cohen's d = -0.05
") + ggtitle("Before and After Positive Attitudes toward Whites
"
)  +  
                      scale_y_continuous(limit = c(0, 10)) +
 stat_compare_means(vjust = 0.5, method = "t.test", paired = TRUE, na.rm = TRUE) +  stat_summary(fun = "mean",
               geom = "point", 
               width = 0.5,
               colour = "white")
## Warning in stat_summary(fun = "mean", geom = "point", width = 0.5, colour =
## "white"): Ignoring unknown parameters: `width`

#conclusion: Asians who experienced outgroup prejudice felt the same toward Whites, compared to their baseline attitudes

2.3 P1 Base R Boxplot 1

mypar(1,1)
dat <- list(Before=Adf$Q95, After=Adf$Asian_Warmth)
dat %>%
   boxplot(xlab = "Condition",
           ylab = "Asian Warmth",
           cex = 0)
 dat %>%
   stripchart(
     vertical = TRUE,
     method = "jitter",
     pch = 16,
     add = TRUE,
     col = "#02a4d3"
   ) 
mtext(text="t(91) = 2.25, p = .027, 95% CI [0.03 0.43], Cohen's d = 0.24", side = 3, adj = 1, col = "black", cex = 0.8, font = 9)

2.4 P1 Base R Boxplot 1

library(tidyverse)
library(rafalib)

mypar(1,1)
list(Wdf)
## [[1]]
## # A tibble: 90 × 42
##    Q95          Q96     Q97   Q98   Q114    Q130    Q130_7_TEXT Q135_1  Q135_2  
##    <dbl+lbl>    <dbl+l> <dbl> <dbl> <dbl+l> <dbl+l> <chr>       <dbl+l> <dbl+lb>
##  1 6 [60° A bi… 6 [60°… NA    NA    5 [5]   5 [The… ""          2 [A L…  2 [A L…
##  2 9 [100° Ver… 4 [40°… NA    NA    4 [4]   6 [The… ""          4 [Qui… NA      
##  3 9 [100° Ver… 6 [60°… NA    NA    3 [3]   5 [The… ""          2 [A L…  2 [A L…
##  4 6 [60° A bi… 5 [50°… NA    NA    4 [4]   6 [The… ""          3 [Mod…  2 [A L…
##  5 8 [85° Quit… 7 [70°… NA    NA    4 [4]   7 [Som… ""          1 [Ver…  1 [Ver…
##  6 9 [100° Ver… 3 [30°… NA    NA    5 [5]   6 [The… ""          2 [A L…  2 [A L…
##  7 8 [85° Quit… 7 [70°… NA    NA    3 [3]   6 [The… ""          1 [Ver…  1 [Ver…
##  8 8 [85° Quit… 6 [60°… NA    NA    4 [4]   5 [The… ""          2 [A L…  4 [Qui…
##  9 6 [60° A bi… 6 [60°… NA    NA    3 [3]   4 [The… ""          3 [Mod…  2 [A L…
## 10 6 [60° A bi… 6 [60°… NA    NA    6 [6 =… 5 [The… ""          1 [Ver…  1 [Ver…
## # ℹ 80 more rows
## # ℹ 33 more variables: Q135_3 <dbl+lbl>, Q135_4 <dbl+lbl>, Q135_5 <dbl+lbl>,
## #   Q135_6 <dbl+lbl>, Q135_7 <dbl+lbl>, Q135_8 <dbl+lbl>, Q135_9 <dbl+lbl>,
## #   Q135_10 <dbl+lbl>, Q127 <dbl+lbl>, Q128 <dbl+lbl>, condition <dbl+lbl>,
## #   Distressed <dbl>, Upset <dbl>, Guilty <dbl>, Scared <dbl>, Hostile <dbl>,
## #   Irritable <dbl>, Ashamed <dbl>, Nervous <dbl>, Jittery <dbl>, Afriad <dbl>,
## #   NegEmo <dbl>, Expectation <dbl>, White_Warmth <dbl>, Asian_Warmth <dbl>, …
Wdf2 %>%
   boxplot(xlab = "Condition",
           ylab = "White Warmth",
           cex = 0)
 Wdf2 %>%
   stripchart(
     vertical = TRUE,
     method = "jitter",
     pch = 16,
     add = TRUE,
     col = "#02a4d3"
   )
mtext(text= "t(88) = -0.44, p = .664, 95% CI [-0.19, 0.12], Cohen's d = -0.05", side = 3, adj = 1, col = "black", cex = 0.8, font = 9)

3 Portfolio 2: Violin and Bar

library(haven)
library(dplyr)
require(ggiraph)
## Loading required package: ggiraph
## Warning: package 'ggiraph' was built under R version 4.2.3
require(ggiraphExtra)
## Loading required package: ggiraphExtra
## 
## Attaching package: 'ggiraphExtra'
## The following object is masked from 'package:see':
## 
##     coord_radar
require(plyr)
## Loading required package: plyr
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:tidygraph':
## 
##     arrange, mutate, rename
## The following objects are masked from 'package:plotly':
## 
##     arrange, mutate, rename, summarise
## The following object is masked from 'package:purrr':
## 
##     compact
## The following object is masked from 'package:ggpubr':
## 
##     mutate
## The following objects are masked from 'package:rstatix':
## 
##     desc, mutate
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
library(ggsci)
library(ggplot2)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(effsize)
library(ggpubr)
Adata <- read_sav("C:/Users/Colin/Documents/GitHub/Colin_portfolio/p02/data/AQ.sav")
Bd <- Adata %>% 
  filter(Asian == "1")

t.test(Bd$Asian_Warmth~Bd$Conditions)
## 
##  Welch Two Sample t-test
## 
## data:  Bd$Asian_Warmth by Bd$Conditions
## t = -2.6589, df = 179.97, p-value = 0.008547
## alternative hypothesis: true difference in means between group Ingroup and group Outgroup is not equal to 0
## 95 percent confidence interval:
##  -0.9720587 -0.1438833
## sample estimates:
##  mean in group Ingroup mean in group Outgroup 
##               7.108696               7.666667
t.test(Bd$White_Warmth~Bd$Conditions)
## 
##  Welch Two Sample t-test
## 
## data:  Bd$White_Warmth by Bd$Conditions
## t = -0.31955, df = 169.49, p-value = 0.7497
## alternative hypothesis: true difference in means between group Ingroup and group Outgroup is not equal to 0
## 95 percent confidence interval:
##  -0.6162466  0.4445319
## sample estimates:
##  mean in group Ingroup mean in group Outgroup 
##               5.880435               5.966292
#This study used a mixed design.
#We measured participants positive attitudes to toward Asians and Whites before and after the condition.
#Participants are Asian Americans. 
#The condition is whether being rejected by Asian or White peers. 
#I excluded people who do not identify as Asian Americans.
#Here, I only examine the between condition attitude difference.
#That is, compared to being rejected by Whites, do they feel have less positive attitude toward Asians when they are rejected by Asians?
#That is, compared to being rejected by Asians, do they feel have less positive attitude toward Whites when they are rejected by Whites?

3.1 Ingroup vs. outgroup prejudice on Asian Warmth

Asians who are rejected by Asians feel less warm toward Asians, compared to being rejected by Whites

ggplot(data = Bd, 
       mapping = aes(x = Conditions, 
                     y = Asian_Warmth, fill = Conditions)) +
  geom_violin() +  stat_summary(fun = "mean",
               geom = "point", 
               width = 0.5,
               colour = "black") + stat_summary(fun.data = "mean_cl_normal",
               geom = "errorbar",
               width = .4) +
  scale_fill_jco()+theme(
  panel.background = element_rect(fill = NA),
  panel.grid.major = element_line(colour = "grey"))+
   scale_y_continuous(limit = c(0, 10))+
  stat_compare_means(method = "t.test", na.rm = TRUE) + xlab("Condition") + 
  ylab("Asian Warmth") +
  labs(caption = "t(180) = -2.66, 95% CI [-0.97, -0.14], Cohen's d = -0.39") + ggtitle("Ingroup Prejudice Negatively Affects Ingroup Attitude")
## Warning in stat_summary(fun = "mean", geom = "point", width = 0.5, colour =
## "black"): Ignoring unknown parameters: `width`

3.1.1 for people who prefer bar graphs

ggplot(Bd, aes(x= Conditions, y = Asian_Warmth)) +
  geom_bar(aes(fill = Conditions), stat = "summary", fun.y = "mean") + scale_y_continuous(limit = c(0, 10)) +  stat_summary(fun = "mean",
               geom = "point", 
               width = 0.5,
               colour = "black") + stat_summary(fun.data = "mean_cl_normal",
               geom = "errorbar",
               width = .4) +
  scale_fill_jco()+ theme(
  panel.background = element_rect(fill = NA),
  panel.grid.major = element_line(colour = "grey"))+
   scale_y_continuous(limit = c(0, 10))+
  stat_compare_means(method = "t.test", na.rm = TRUE) + xlab("Condition") + 
  ylab("Asian Warmth") +
  labs(caption = "t(180) = -2.66, 95% CI [-0.97, -0.14], Cohen's d = -0.39") + ggtitle("Ingroup Prejudice Negatively Affects Ingroup Attitude")
## Warning in geom_bar(aes(fill = Conditions), stat = "summary", fun.y = "mean"):
## Ignoring unknown parameters: `fun.y`
## Warning in stat_summary(fun = "mean", geom = "point", width = 0.5, colour =
## "black"): Ignoring unknown parameters: `width`
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## No summary function supplied, defaulting to `mean_se()`

3.2 Ingroup vs. outgroup prejudice on White Warmth

Asians who are rejected by Whites feel the same toward Whites, compared to being rejected by Asians

ggplot(data = Bd, 
       mapping = aes(x = Conditions, 
                     y = White_Warmth, fill = Conditions)) +
  geom_violin() + stat_summary(fun = "mean",
               geom = "point", 
               width = 0.5,
               colour = "black") + stat_summary(fun.data = "mean_cl_normal",
               geom = "errorbar",
               width = .4) +
  scale_fill_jco()+theme(
  panel.background = element_rect(fill = NA),
  panel.grid.major = element_line(colour = "grey"))+
   scale_y_continuous(limit = c(0, 10))+
  stat_compare_means(method = "t.test", na.rm = TRUE) + xlab("Condition") + 
  ylab("White Warmth") + 
  labs(caption = "t(179) = -.32, 95% CI [-0.61, 0.44], Cohen's d = -0.05") + ggtitle("Prejudice Type Had No Effect on Outgroup Attitude")
## Warning in stat_summary(fun = "mean", geom = "point", width = 0.5, colour =
## "black"): Ignoring unknown parameters: `width`
## Warning: Removed 1 rows containing non-finite values (`stat_ydensity()`).
## Warning: Removed 1 rows containing non-finite values (`stat_summary()`).
## Removed 1 rows containing non-finite values (`stat_summary()`).

3.2.1 for people who prefer bar graphs

ggplot(Bd, aes(x= Conditions, y = White_Warmth)) +
  geom_bar(aes(fill = Conditions), stat = "summary", fun.y = "mean") + scale_y_continuous(limit = c(0, 10)) +  stat_summary(fun = "mean",
               geom = "point", 
               width = 0.5,
               colour = "black") + stat_summary(fun.data = "mean_cl_normal",
               geom = "errorbar",
               width = .4) +
  scale_fill_jco()+ theme(
  panel.background = element_rect(fill = NA),
  panel.grid.major = element_line(colour = "grey"))+
   scale_y_continuous(limit = c(0, 10))+
  stat_compare_means(method = "t.test", na.rm = TRUE) + xlab("Condition") + 
  ylab("White Warmth") +
  labs(caption = "t(179) = -.32, 95% CI [-0.61, 0.44], Cohen's d = -0.05") + ggtitle("Prejudice Type Had No Effect on Outgroup Attitude")
## Warning in geom_bar(aes(fill = Conditions), stat = "summary", fun.y = "mean"):
## Ignoring unknown parameters: `fun.y`
## Warning in stat_summary(fun = "mean", geom = "point", width = 0.5, colour =
## "black"): Ignoring unknown parameters: `width`
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## Warning: Removed 1 rows containing non-finite values (`stat_summary()`).
## No summary function supplied, defaulting to `mean_se()`
## Warning: Removed 1 rows containing non-finite values (`stat_summary()`).
## Warning: Removed 1 rows containing non-finite values (`stat_summary()`).

3.3 Prejudice type * centrality interaction

3.3.1 Prejudice type * centrality interaction on Asian warmth

The impact of prejudice type (ingroup vs. outgroup) on positive attitude toward Asians does not differ by centrality (i.e., how much Asians value their group membership)

ggplot(data = Bd, 
       mapping = aes(x = Centrality, 
                     y = Asian_Warmth, color=Conditions)) +
  xlab("Centrality") + 
  ylab("Asian Warmth") + 
  geom_point() +
  scale_color_jco()+
  geom_smooth(method = lm)+ theme(
  panel.background = element_rect(fill = NA),
  panel.grid.major = element_line(colour = "grey"),
)+
   scale_y_continuous(limit = c(1, 10)) + ggtitle("The Impact of Prejudice Type Does Not Differ by Centrality")
## `geom_smooth()` using formula = 'y ~ x'

3.3.2 Prejudice type * centrality interaction on White warmth

The impact of Asian prejudice type (ingroup vs. outgroup) on positive attitude toward Whites does not differ by centrality (i.e., how much Asians value their group membership)

ggplot(data = Bd, 
       mapping = aes(x = Centrality, 
                     y = White_Warmth, color=Conditions)) +
  xlab("Centrality") + 
  ylab("White Warmth") +
  geom_point() +
  scale_color_jco()+
  geom_smooth(method = lm)+ theme(
  panel.background = element_rect(fill = NA),
  panel.grid.major = element_line(colour = "grey"),
)+
   scale_y_continuous(limit = c(1, 10)) + ggtitle("The Impact of Prejudice Type Does Not Differ by Centrality")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).

3.4 Backup plots

These are backup plots for the violin plots

#library(dplyr)
#library(magrittr)

Bd1 <- Bd %>% select(Conditions, Asian_Warmth, White_Warmth)


ggstatsplot::ggbetweenstats(
  data = Bd1, 
  x = Conditions, 
  y = Asian_Warmth,
  messages = FALSE) + scale_y_continuous(limit = c(0, 10))+ xlab("Condition") + 
  ylab("Asian Warmth") + scale_color_jco()+theme(
  panel.background = element_rect(fill = NA),
  panel.grid.major = element_line(colour = "grey"))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

ggstatsplot::ggbetweenstats(
  data = Bd1, 
  x = Conditions, 
  y = White_Warmth,
  messages = FALSE) + scale_y_continuous(limit = c(0, 10))+ xlab("Condition") + 
  ylab("White Warmth") + scale_color_jco()+theme(
  panel.background = element_rect(fill = NA),
  panel.grid.major = element_line(colour = "grey"))
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

4 Portfolio 3: Pie and Donut

##donut noninteractive

donut <- data.frame(
  Race=c("East", "Southeast", "Unknown", "Filipino/Pacific Islander",  "South Asian", "Multiethnic", "Multiracial"),
  count=c(72, 31, 28, 25, 11, 8, 7)
)

donut$fraction <- donut$count / sum(donut$count)

# Compute the cumulative percentages (top of each rectangle)
donut$ymax <- cumsum(donut$fraction)

# Compute the bottom of each rectangle
donut$ymin <- c(0, head(donut$ymax, n=-1))

# Compute label position
donut$labelPosition <- (donut$ymax + donut$ymin) / 2

# Compute a good label
donut$label <- paste0(donut$category, "\n value: ", donut$count)


#donut
ggplot(donut, aes(ymax=ymax, ymin=ymin, xmax=4, xmin=3, fill=Race)) +
  geom_rect() +
  scale_fill_jco()  + 
  coord_polar(theta="y") +
  xlim(c(2, 4)) +
  theme_void() + ggtitle("Asian Ethnicity")

4.1 bar interactive

p <- ggplot(donut, aes(x = Race, y = count, fill=Race)) +
  geom_col() + scale_fill_jco()  + theme_void() + ggtitle("Asian Ethnicity")

ggplotly(p)

4.2 pie interactive

colors <- c('#0073C2FF',    '#EFC000FF',    '#868686FF',    '#CD534CFF',    
'#7AA6DCFF', '#003C67FF',   '#8F7700FF',    '#3B3B3BFF', '#A73030FF',   '#4A6990FF')



fig <- plot_ly(donut, labels = ~Race, values = ~count, type = 'pie', marker = list(colors = colors,
                      line = list(color = '#FFFFFF', width = 1)))

fig <- fig %>% layout(title = 'Asian Ethnicity')

fig

5 Portfolio 4: Table and Correlation Plot

5.1 Nice Table

ta <- read_csv("C:/Users/Colin/Documents/socfit.csv")
## Rows: 3 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Model
## dbl (7): Number of Parameters, Chi-Square, Degrees of Freedom, P-Value, CFI,...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
customGreen0 = "#DeF7E9"

customGreen = "#71CA97"

formattable(ta, align =c("l","c", "c", "c", "c","c","c","r"), list(
  `Model` = formatter("span", style = ~ formattable::style(color = "black", font.weight = "bold"), x ~ icontext(ifelse(x != "", "thumbs-up", ""), x)), 
  `Chi-Square`= color_tile(customGreen0, customGreen), 
  `P-Value`= color_tile(customGreen, customGreen0)))
Model Number of Parameters Chi-Square Degrees of Freedom P-Value CFI RMESA SRMR
Configural 26 3.95 2 0.14 0.998 0.07 0.01
Metric 23 7.64 5 0.18 0.998 0.05 0.04
Scalar 20 9.19 8 0.33 0.990 0.03 0.05
fi2 <- read_csv("C:/Users/Colin/Documents/semfit.csv")
## Rows: 3 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Model
## dbl (7): Number of Parameters, Chi-Square, Degrees of Freedom, P-Value, CFI,...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
formattable(fi2, align =c("l","c", "c", "c", "c","c","c","r"), list(
  `Model` = formatter("span", style = ~ formattable::style(color = "black", font.weight = "bold"), x ~ icontext(ifelse(x != "", "thumbs-up", ""), x)), 
  `Chi-Square`= color_tile(customGreen0, customGreen), 
  `P-Value`= color_tile(customGreen, customGreen0)))
Model Number of Parameters Chi-Square Degrees of Freedom P-Value CFI RMESA SRMR
Configural 26 2.01 2 0.37 1 0.006 0.006
Metric 23 2.89 5 0.72 1 0.000 0.018
Scalar 20 7.10 8 0.53 1 0.000 0.030

5.2 Correlation Matrix

library(ggplot2)
library(corrplot)
## corrplot 0.92 loaded
library(ggstatsplot)
library(ggsci)
library(ggcorrplot)
## 
## Attaching package: 'ggcorrplot'
## The following object is masked from 'package:rstatix':
## 
##     cor_pmat
co <- read_sav("C:/Users/Colin/Documents/belong2.sav")

corr <- cor(co, use = "pairwise")


#testRes = cor.mtest(co, conf.level = 0.95)

#corrplot(corr, method="color", col=col(200),  type="upper", order="hclust", addCoef.col = "black", tl.col="black", tl.srt=45, sig.level = 0.01, insig = "blank", diag=FALSE)

ggcorrmat(co, cor.vars = c(Belonging, gpa, swl, se, mh, mil_pre, mil_search), 
  cor.vars.names = c(
    "College Belonging",
    "GPA",
    "Life Satisfaction",
    "Self-Esteem",
    "Mental Health",
    "Meaning in Life (Presence)",
    "Meaning in Life (Searching)"), p.adjust.method = "none")

col <- colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))



colors <- c('#E64B35FF',    '#4DBBD5FF',    '#00A087FF',    '#3C5488FF',
'#F39B7FFF',    '#8491B4FF', '#91D1C2FF',   '#DC0000FF',    
'#7E6148FF',    '#B09C85FF')



ggcorrplot(corr,
  type = "upper",
  insig = "blank",
  lab = TRUE,
  digits = 2, colors = c("#E69F00", "white", "#009E73"), ggtheme=ggstatsplot::theme_ggstatsplot())

6 Portfolio 5: QCV

This procedure is called quantifying construct validity. Essentially we asked people to hypothesize the correlations between the definition of belonging with other variables. Then we obtain the actual correlations between our belonging scale and these variables. Then an index can be computed based on the mismatch between hypothesized correlations and actual correlations. This index represent the focal scale’s convergent and discriminant validity. P.S. On the graphs, you cannot see the variable names but you can tell what the variables are in the code.

6.1 line

library(tidyverse)
library(plotly)
library(ggplot2)
library(ggsci)

qcv <- read_csv("C:/Users/Colin/Documents/newdata.csv")
## Rows: 32 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Correlation, Variables
## dbl (1): r
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
qcv$Variables <- recode_factor(qcv$Variables, NTB = "1", GBS = "2", IS = "3", Friend = "4", Grit = "5", Growth = "6", GSE = "7", ACSF = "8", Perf = "9", Lone = "10", HH = "11", Emo = "12", Ext = "13", Agb = "14", Con = "15", Op = "16")

p <- ggplot(qcv, aes(x = Variables, y = r, group = Correlation)) + geom_line(aes(color = Correlation)) + geom_point(aes(color = Correlation)) + theme(panel.background = element_rect(fill = "white", colour = "grey50")) + scale_color_futurama() + scale_y_continuous(limits = c(-1, 1)) +xlab("Criterion Variables") + ggtitle("The Pattern of Correlations Across Criterion Variables")

#qcv$Variables <- recode_factor(qcv$Variables, NTB  = "Need to Belong", GBS = "General Belongingness", ACSF = "Academic Contingency of Self-Worth", Agb = "Agreebleness", Emo = "Emotionality", Ext = "Extraversion", HH = "Honesty-Humility", Con = "Conscientiousness", Op = "Openness", Perf = "Perfectionism", Friend = "Friendship", Growth = "Growth Mindset", Lone = "Loneliness", IS = "Interpersonal Support", Grit = "Grit", GSE = "General Self-Efficacy")


p

ggplotly(p)

6.2 mismatch

o <- ggplot(qcv, aes(x = Variables, y = r, color = Correlation)) + geom_line(color="black", size = 0.5) +
    geom_point(aes(color=Correlation, size = 0.5, alpha = 0.8)) + theme(panel.background = element_rect(fill = "white", colour = "grey50")) + scale_color_futurama() + scale_y_continuous(limits = c(-1, 1)) + ggtitle("The Mismatch Between Predicted and Actual Correlations") + guides(alpha = FALSE, size = FALSE) + xlab("Criterion Variables")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
o

ggplotly(o)

7 Portfolio 6: Power

I found a cool package that can visualize power as long as you provide the testing statistics and df. The red curve shows the null hypothesis, whereas the blue curve shows the alternative hypothesis. I wish I could change the legend theme, but I tried the code that works for ggplot won’t work for this plot.

power.t.test(ncp = -2.6589, df = 179.97, alpha = 0.05, alternative = "not equal") 

##      power ncp.alt ncp.null alpha     df  t.crit.1 t.crit.2
##  0.7532427  -2.659        0  0.05 179.97 -1.973233 1.973233

the statistics for the plot below come from another study where I ran a planned contrast. Again, this is useful because published papers all have to show these statistics, so we can visualize their power whenever we want.

power.f.test(ncp = 6.49, df1 = 1, df2 = 7257, alpha = 0.05, plot = TRUE)

##      power ncp.alt ncp.null alpha df1  df2   f.crit
##  0.7214843    6.49        0  0.05   1 7257 3.842741

8 Portfolio 7: Ant Colony Optimization

Background: My thesis is a scale development project where in study 1 I used IRT and CFA to select best 6 belonging items from a pool of items. These are traditional psychometric approaches to creating short scales. But I heard there are modern machine learning algorithms can do that. Ant Colony Optimization is one of them. So I wanted to see if using this method would result in the same 6 items that we selected from using the traditional psychometric methods.

ant <- read_sav("C:/Users/Colin/Documents/ant.sav")

antModel = ' Belong =~ Bi1 + BiBPS + Bi3 + Bi9 + Bi11 + Bi12 + Bi15 + Bi16 + Bi17 + Bi18 + Bi23 + Bi24 + Bi26 + Bi27 + Bi28 + Bi29 + Bi30 +'

list.items <- 
  list(c(
  "Bi1",
"BiBPS",
"Bi3",
"Bi9",
"Bi11",
"Bi12",
"Bi15",
"Bi16",
"Bi17",
"Bi18",
"Bi23",
"Bi24",
"Bi26",
"Bi27",
"Bi28",
"Bi29",
"Bi30"))

abilityShortForm <- antcolony.lavaan(
  data = ant,
  ants = 20, evaporation = 0.8, antModel = antModel,
  list.items = list.items, full = 17, i.per.f = 6,
  factors = "Belong", steps = 50, fit.indices = c("cfi", "tli", "rmsea", "srmr"),
  fit.statistics.test = "(cfi > 0.95)&(tli > 0.95)&(rmsea < 0.06)&(srmr<0.08)",  
                   max.run = 1000)
## 
 Run number 1 and ant number 1.           
 Run number 1 and ant number 2.           
 Run number 1 and ant number 3.           
 Run number 1 and ant number 4.           
 Run number 1 and ant number 5.           
 Run number 1 and ant number 6.           
 Run number 1 and ant number 7.           
 Run number 1 and ant number 8.           
 Run number 1 and ant number 9.           
 Run number 1 and ant number 10.           
 Run number 1 and ant number 11.           
 Run number 1 and ant number 12.           
 Run number 1 and ant number 13.           
 Run number 1 and ant number 14.           
 Run number 1 and ant number 15.           
 Run number 1 and ant number 16.           
 Run number 1 and ant number 17.           
 Run number 1 and ant number 18.           
 Run number 1 and ant number 19.           
 Run number 1 and ant number 20.           
 Run number 2 and ant number 1.           
 Run number 2 and ant number 2.           
 Run number 2 and ant number 3.           
 Run number 2 and ant number 4.           
 Run number 2 and ant number 5.           
 Run number 2 and ant number 6.           
 Run number 2 and ant number 7.           
 Run number 2 and ant number 8.           
 Run number 2 and ant number 9.           
 Run number 2 and ant number 10.           
 Run number 2 and ant number 11.           
 Run number 2 and ant number 12.           
 Run number 2 and ant number 13.           
 Run number 2 and ant number 14.           
 Run number 2 and ant number 15.           
 Run number 2 and ant number 16.           
 Run number 2 and ant number 17.           
 Run number 2 and ant number 18.           
 Run number 2 and ant number 19.           
 Run number 2 and ant number 20.           
 Run number 3 and ant number 1.           
 Run number 3 and ant number 2.           
 Run number 3 and ant number 3.           
 Run number 3 and ant number 4.           
 Run number 3 and ant number 5.           
 Run number 3 and ant number 6.           
 Run number 3 and ant number 7.           
 Run number 3 and ant number 8.           
 Run number 3 and ant number 9.           
 Run number 3 and ant number 10.           
 Run number 3 and ant number 11.           
 Run number 3 and ant number 12.           
 Run number 3 and ant number 13.           
 Run number 3 and ant number 14.           
 Run number 3 and ant number 15.           
 Run number 3 and ant number 16.           
 Run number 3 and ant number 17.           
 Run number 3 and ant number 18.           
 Run number 3 and ant number 19.           
 Run number 3 and ant number 20.           
 Run number 4 and ant number 1.           
 Run number 4 and ant number 2.           
 Run number 4 and ant number 3.           
 Run number 4 and ant number 4.           
 Run number 4 and ant number 5.           
 Run number 4 and ant number 6.           
 Run number 4 and ant number 7.           
 Run number 4 and ant number 8.           
 Run number 4 and ant number 9.           
 Run number 4 and ant number 10.           
 Run number 4 and ant number 11.           
 Run number 4 and ant number 12.           
 Run number 4 and ant number 13.           
 Run number 4 and ant number 14.           
 Run number 4 and ant number 15.           
 Run number 4 and ant number 16.           
 Run number 4 and ant number 17.           
 Run number 4 and ant number 18.           
 Run number 4 and ant number 19.           
 Run number 4 and ant number 20.           
 Run number 5 and ant number 1.           
 Run number 5 and ant number 2.           
 Run number 5 and ant number 3.           
 Run number 5 and ant number 4.           
 Run number 5 and ant number 5.           
 Run number 5 and ant number 6.           
 Run number 5 and ant number 7.           
 Run number 5 and ant number 8.           
 Run number 5 and ant number 9.           
 Run number 5 and ant number 10.           
 Run number 5 and ant number 11.           
 Run number 5 and ant number 12.           
 Run number 5 and ant number 13.           
 Run number 5 and ant number 14.           
 Run number 5 and ant number 15.           
 Run number 5 and ant number 16.           
 Run number 5 and ant number 17.           
 Run number 5 and ant number 18.           
 Run number 5 and ant number 19.           
 Run number 5 and ant number 20.           
 Run number 6 and ant number 1.           
 Run number 6 and ant number 2.           
 Run number 6 and ant number 3.           
 Run number 6 and ant number 4.           
 Run number 6 and ant number 5.           
 Run number 6 and ant number 6.           
 Run number 6 and ant number 7.           
 Run number 6 and ant number 8.           
 Run number 6 and ant number 9.           
 Run number 6 and ant number 10.           
 Run number 6 and ant number 11.           
 Run number 6 and ant number 12.           
 Run number 6 and ant number 13.           
 Run number 6 and ant number 14.           
 Run number 6 and ant number 15.           
 Run number 6 and ant number 16.           
 Run number 6 and ant number 17.           
 Run number 6 and ant number 18.           
 Run number 6 and ant number 19.           
 Run number 6 and ant number 20.           
 Run number 7 and ant number 1.           
 Run number 7 and ant number 2.           
 Run number 7 and ant number 3.           
 Run number 7 and ant number 4.           
 Run number 7 and ant number 5.           
 Run number 7 and ant number 6.           
 Run number 7 and ant number 7.           
 Run number 7 and ant number 8.           
 Run number 7 and ant number 9.           
 Run number 7 and ant number 10.           
 Run number 7 and ant number 11.           
 Run number 7 and ant number 12.           
 Run number 7 and ant number 13.           
 Run number 7 and ant number 14.           
 Run number 7 and ant number 15.           
 Run number 7 and ant number 16.           
 Run number 7 and ant number 17.           
 Run number 7 and ant number 18.           
 Run number 7 and ant number 19.           
 Run number 7 and ant number 20.           
 Run number 8 and ant number 1.           
 Run number 8 and ant number 2.           
 Run number 8 and ant number 3.           
 Run number 8 and ant number 4.           
 Run number 8 and ant number 5.           
 Run number 8 and ant number 6.           
 Run number 8 and ant number 7.           
 Run number 8 and ant number 8.           
 Run number 8 and ant number 9.           
 Run number 8 and ant number 10.           
 Run number 8 and ant number 11.           
 Run number 8 and ant number 12.           
 Run number 8 and ant number 13.           
 Run number 8 and ant number 14.           
 Run number 8 and ant number 15.           
 Run number 8 and ant number 16.           
 Run number 8 and ant number 17.           
 Run number 8 and ant number 18.           
 Run number 8 and ant number 19.           
 Run number 8 and ant number 20.           
 Run number 9 and ant number 1.           
 Run number 9 and ant number 2.           
 Run number 9 and ant number 3.           
 Run number 9 and ant number 4.           
 Run number 9 and ant number 5.           
 Run number 9 and ant number 6.           
 Run number 9 and ant number 7.           
 Run number 9 and ant number 8.           
 Run number 9 and ant number 9.           
 Run number 9 and ant number 10.           
 Run number 9 and ant number 11.           
 Run number 9 and ant number 12.           
 Run number 9 and ant number 13.           
 Run number 9 and ant number 14.           
 Run number 9 and ant number 15.           
 Run number 9 and ant number 16.           
 Run number 9 and ant number 17.           
 Run number 9 and ant number 18.           
 Run number 9 and ant number 19.           
 Run number 9 and ant number 20.           
 Run number 10 and ant number 1.           
 Run number 10 and ant number 2.           
 Run number 10 and ant number 3.           
 Run number 10 and ant number 4.           
 Run number 10 and ant number 5.           
 Run number 10 and ant number 6.           
 Run number 10 and ant number 7.           
 Run number 10 and ant number 8.           
 Run number 10 and ant number 9.           
 Run number 10 and ant number 10.           
 Run number 10 and ant number 11.           
 Run number 10 and ant number 12.           
 Run number 10 and ant number 13.           
 Run number 10 and ant number 14.           
 Run number 10 and ant number 15.           
 Run number 10 and ant number 16.           
 Run number 10 and ant number 17.           
 Run number 10 and ant number 18.           
 Run number 10 and ant number 19.           
 Run number 10 and ant number 20.           
 Run number 11 and ant number 1.           
 Run number 11 and ant number 2.           
 Run number 11 and ant number 3.           
 Run number 11 and ant number 4.           
 Run number 11 and ant number 5.           
 Run number 11 and ant number 6.           
 Run number 11 and ant number 7.           
 Run number 11 and ant number 8.           
 Run number 11 and ant number 9.           
 Run number 11 and ant number 10.           
 Run number 11 and ant number 11.           
 Run number 11 and ant number 12.           
 Run number 11 and ant number 13.           
 Run number 11 and ant number 14.           
 Run number 11 and ant number 15.           
 Run number 11 and ant number 16.           
 Run number 11 and ant number 17.           
 Run number 11 and ant number 18.           
 Run number 11 and ant number 19.           
 Run number 11 and ant number 20.           
 Run number 12 and ant number 1.           
 Run number 12 and ant number 2.           
 Run number 12 and ant number 3.           
 Run number 12 and ant number 4.           
 Run number 12 and ant number 5.           
 Run number 12 and ant number 6.           
 Run number 12 and ant number 7.           
 Run number 12 and ant number 8.           
 Run number 12 and ant number 9.           
 Run number 12 and ant number 10.           
 Run number 12 and ant number 11.           
 Run number 12 and ant number 12.           
 Run number 12 and ant number 13.           
 Run number 12 and ant number 14.           
 Run number 12 and ant number 15.           
 Run number 12 and ant number 16.           
 Run number 12 and ant number 17.           
 Run number 12 and ant number 18.           
 Run number 12 and ant number 19.           
 Run number 12 and ant number 20.           
 Run number 13 and ant number 1.           
 Run number 13 and ant number 2.           
 Run number 13 and ant number 3.           
 Run number 13 and ant number 4.           
 Run number 13 and ant number 5.           
 Run number 13 and ant number 6.           
 Run number 13 and ant number 7.           
 Run number 13 and ant number 8.           
 Run number 13 and ant number 9.           
 Run number 13 and ant number 10.           
 Run number 13 and ant number 11.           
 Run number 13 and ant number 12.           
 Run number 13 and ant number 13.           
 Run number 13 and ant number 14.           
 Run number 13 and ant number 15.           
 Run number 13 and ant number 16.           
 Run number 13 and ant number 17.           
 Run number 13 and ant number 18.           
 Run number 13 and ant number 19.           
 Run number 13 and ant number 20.           
 Run number 14 and ant number 1.           
 Run number 14 and ant number 2.           
 Run number 14 and ant number 3.           
 Run number 14 and ant number 4.           
 Run number 14 and ant number 5.           
 Run number 14 and ant number 6.           
 Run number 14 and ant number 7.           
 Run number 14 and ant number 8.           
 Run number 14 and ant number 9.           
 Run number 14 and ant number 10.           
 Run number 14 and ant number 11.           
 Run number 14 and ant number 12.           
 Run number 14 and ant number 13.           
 Run number 14 and ant number 14.           
 Run number 14 and ant number 15.           
 Run number 14 and ant number 16.           
 Run number 14 and ant number 17.           
 Run number 14 and ant number 18.           
 Run number 14 and ant number 19.           
 Run number 14 and ant number 20.           
 Run number 15 and ant number 1.           
 Run number 15 and ant number 2.           
 Run number 15 and ant number 3.           
 Run number 15 and ant number 4.           
 Run number 15 and ant number 5.           
 Run number 15 and ant number 6.           
 Run number 15 and ant number 7.           
 Run number 15 and ant number 8.           
 Run number 15 and ant number 9.           
 Run number 15 and ant number 10.           
 Run number 15 and ant number 11.           
 Run number 15 and ant number 12.           
 Run number 15 and ant number 13.           
 Run number 15 and ant number 14.           
 Run number 15 and ant number 15.           
 Run number 15 and ant number 16.           
 Run number 15 and ant number 17.           
 Run number 15 and ant number 18.           
 Run number 15 and ant number 19.           
 Run number 15 and ant number 20.           [1] "Compiling results."
abilityShortForm
## Algorithm: Ant Colony Optimization
## Total Run Time: 23.372 secs
## 
## Function call:
## antcolony.lavaan(data = ant, ants = 20, evaporation = 0.8, antModel = antModel,
##   list.items = list.items, full = 17, i.per.f = 6, factors = "Belong", steps
##   = 50, fit.indices = c("cfi", "tli", "rmsea", "srmr"), fit.statistics.test =
##   "(cfi > 0.95)&(tli > 0.95)&(rmsea < 0.06)&(srmr<0.08)", max.run = 1000)
## 
## Final Model Syntax:
## Belong =~ BiBPS + Bi17 + Bi15 + Bi11 + Bi12 + Bi1
plot(abilityShortForm, type = 'pheromone')

Conclusion: I still don’t quite understand how to set up all the parameters. And not sure whether this method is superior than traditional psychometric approaches. But 5 out of the resulting 6 items are the same as what we got by using CFA & IRT.

9 Portfolio 8: Histogram and Density

Background: I’m told that it is helpful to look at each item’s histogram when creating a scale.

9.1 All items at a time via psych

Be <- read_sav("C:/Users/Colin/Documents/PO8.sav")

multi.hist(Be[,sapply(Be, is.numeric)], freq = TRUE, bcol = "#79af97", breaks = 15, main = c("Item 1", "Item 2", "Item 3 (R)", "Item 4"))

9.2 All items seperately

ggplot(Be, aes(x = Be1)) + geom_histogram(fill = "#79af97", color = "black") + theme_classic() + labs(title = "Item 1") + theme(plot.title = element_text(face = "bold", hjust = 0.5), axis.title.x = element_blank(), axis.title.y = element_blank()) 
## Don't know how to automatically pick scale for object of type
## <haven_labelled/vctrs_vctr/double>. Defaulting to continuous.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(Be, aes(x = Be2)) + geom_histogram(fill = "#79af97", color = "black") + theme_classic() + labs(title = "Item 2", xlab = "") + theme(plot.title = element_text(face = "bold", hjust = 0.5), axis.title.x = element_blank(), axis.title.y = element_blank()) 
## Don't know how to automatically pick scale for object of type
## <haven_labelled/vctrs_vctr/double>. Defaulting to continuous.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(Be, aes(x = BeR)) + geom_histogram(fill = "#79af97", color = "black") + theme_classic() + labs(title = "Item 3 (R)", xlab = "") + theme(plot.title = element_text(face = "bold", hjust = 0.5), axis.title.x = element_blank(), axis.title.y = element_blank()) 
## Don't know how to automatically pick scale for object of type
## <haven_labelled/vctrs_vctr/double>. Defaulting to continuous.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(Be, aes(x = Be3)) + geom_histogram(fill = "#79af97", color = "black") + theme_classic() + labs(title = "Item 4", xlab = "") + theme(plot.title = element_text(face = "bold", hjust = 0.5), axis.title.x = element_blank(), axis.title.y = element_blank()) 
## Don't know how to automatically pick scale for object of type
## <haven_labelled/vctrs_vctr/double>. Defaulting to continuous.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

9.3 All items at a time via ggplot

Be_A <- melt(Be, measure.vars = c("Be1","Be2","BeR", "Be3"),
     variable.name = "Belong", value.name="Score")
## Warning in melt(Be, measure.vars = c("Be1", "Be2", "BeR", "Be3"), variable.name
## = "Belong", : The melt generic in data.table has been passed a tbl_df and will
## attempt to redirect to the relevant reshape2 method; please note that reshape2
## is deprecated, and this redirection is now deprecated as well. To continue
## using melt methods from reshape2 while both libraries are attached, e.g.
## melt.list, you can prepend the namespace like reshape2::melt(Be). In the next
## version, this warning will become an error.
## Warning: attributes are not identical across measure variables; they will be
## dropped
Be_A$Belong <- factor(Be_A$Belong, levels = c("Be1", "Be2", "BeR", "Be3"),
                  labels = c("Item 1", "Item 2", "Item 3 (R)", "Item 4")
                  )
 
ggplot(Be_A, aes(x = Score)) + geom_histogram(fill = "#79af97", color = "black") + theme_classic() + theme(plot.title = element_text(face = "bold", hjust = 0.5), axis.title.x = element_blank(), axis.title.y = element_blank()) + facet_wrap(~ Belong) + theme(strip.background = element_blank(), strip.text = element_text(size = 10, face = "bold"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

9.4 Density plot

ggplot(Be_A, aes(x = Score, y = Belong, fill = stat(x))) +
  geom_density_ridges_gradient(bandwidth = 0.4) +
  scale_fill_gradient(name = "Range") + coord_cartesian(clip = "off") +
  theme_classic() + scale_x_continuous(limit = c(1, 5)) 
## Warning: `stat(x)` was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(x)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

It’s cool to have all the density plot together but then I feel like the continuous scale is kinda useless, i.e., the color change means nothing really. I was also having trouble of naming the legend.

ggplot(Be_A, aes(x = Score, y = Belong, fill = stat(x))) + geom_density_ridges_gradient(bandwidth = 0.4) +
  scale_fill_viridis_c(name = "Range", limits=c(1, 5), breaks=seq(1,5,by=1))  + coord_cartesian(clip = "off") +
  theme_classic() + scale_x_continuous(limit = c(1, 5)) + labs(title = "Data Distribution of A Short College Belonging Scale") + theme(plot.title = element_text(face = "bold", hjust = 0.5))

I really like the final look.

ggplot(Be_A, aes(x = Score, y = Belong, fill = Belong)) +   geom_density_ridges(fill = "#79af97", alpha = 0.5, bandwidth = 0.4) +
  theme_classic() + scale_x_continuous(limit = c(1, 5)) + labs(title = "Data Distribution of A Short College Belonging Scale") + theme(plot.title = element_text(face = "bold", hjust = 0.5))

10 Portfolio 9: Dot and Whisker/Forest Plot

People use multiple regression all the time. I always had a hard time plotting regression results and did not know another way to report it besides using tables. Then I discovered the Dot-and-Whisker Plot.

10.1 Building Models

reg <- read_sav("C:/Users/Colin/Documents/reg.sav")

fear <- lm(fe ~ case_p + death_p + cv13 + risk_r + covrac, data = reg)

summary(fear)
## 
## Call:
## lm(formula = fe ~ case_p + death_p + cv13 + risk_r + covrac, 
##     data = reg)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.0841 -0.7899 -0.1063  0.6567  2.4585 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.48008    0.27325   5.417 1.56e-07 ***
## case_p      -0.21567    0.10142  -2.126   0.0346 *  
## death_p      2.71570    0.85484   3.177   0.0017 ** 
## cv13         0.13929    0.02779   5.012 1.09e-06 ***
## risk_r       0.31327    0.05291   5.921 1.19e-08 ***
## covrac       0.07679    0.04158   1.847   0.0661 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1 on 225 degrees of freedom
##   (15 observations deleted due to missingness)
## Multiple R-squared:  0.3374, Adjusted R-squared:  0.3226 
## F-statistic: 22.91 on 5 and 225 DF,  p-value: < 2.2e-16
distress <- lm(dis ~ case_p + death_p + cv13 + risk_r + covrac , data = reg)

psyh <- lm(psyh ~ case_p + death_p + cv13 + risk_r + covrac, data = reg)

10.2 Preliminary Plot

dwplot(list(fear, distress, psyh), vline = geom_vline(
           xintercept = 0,
           colour = "grey60",
           linetype = 2
       ),
       vars_order = c("case_p", "death_p", "cv13", "risk_r", "covrac")) %>% 
  relabel_predictors(
        c(
            case_p = "County Covid Case %",
            death_p = "County Death Case %",
            cv13 = "Covid-Related Life Events",
            risk_r = "Race-Related Covid Risk",
           covrac = "Covid-Induced Racism"
        )) + theme_classic() + scale_color_aaas(name = "Outcome Variables",
        labels = c("Fear of Covid", "Psychological Distress", "Psychological Health Change")) + labs(title = "Predicting Negative Psychological Outcomes") + theme(plot.title = element_text(face = "bold", hjust = 0.5),  legend.position = c(0.5, 0.1),
        legend.justification = c(0, 0),
        legend.background = element_rect(colour = "grey80"),
        legend.title.align = .5
    )

The dots are the estimates, the lines indicate 95% confidence intervals. It might be easy to use covid-death percentages as an example. We can see death% predicts fear of covid but not the other 2 outcomes because the range includes 0. Obviously there are still problems. I wish the lines and dots could be thicker. I also hope that I can plot the standardized coefficients so the death% doesn’t look so dramatic.

10.3 Manually Standardize Variables

reg <- reg[complete.cases(reg[ , c('case_p', 'death_p')]), ]


case_p.z <- (reg$case_p - mean(reg$case_p)) / sd(reg$case_p)

death_p.z <- (reg$death_p - mean(reg$death_p)) / sd(reg$death_p)

cv13.z <- (reg$cv13 - mean(reg$cv13)) / sd(reg$cv13)

risk_r.z <- (reg$risk_r - mean(reg$risk_r)) / sd(reg$risk_r)

covrac.z <- (reg$covrac - mean(reg$covrac)) / sd(reg$covrac)

fe.z <- (reg$fe - mean(reg$fe)) / sd(reg$fe)

dis.z <- (reg$dis - mean(reg$dis)) / sd(reg$dis)

psyh.z <- (reg$psyh - mean(reg$psyh)) / sd(reg$psyh)

10.4 Dot and Whisker

fear.z <- lm(fe.z ~ case_p.z + death_p.z + cv13.z + risk_r.z + covrac.z, data = reg)

distress.z <- lm(dis.z ~ case_p.z + death_p.z + cv13.z + risk_r.z + covrac.z , data = reg)

psyh.z <- lm(psyh.z ~ case_p.z + death_p.z + cv13.z + risk_r.z + covrac.z, data = reg)

dwplot(list(fear.z, distress.z, psyh.z), vline = geom_vline(
           xintercept = 0,
           colour = "grey60",
           linetype = 2), whisker_args = list(width = 0.2, size = 1), dot_args = list(size = 3), 
       vars_order = c("case_p.z", "death_p.z", "cv13.z", "risk_r.z", "covrac.z")) %>% 
  relabel_predictors(
        c(
            case_p.z = "County Covid Case %",
            death_p.z = "County Death Case %",
            cv13.z = "Covid-Related Life Events",
            risk_r.z = "Race-Related Covid Risk",
           covrac.z = "Covid-Induced Racism"
        )) + theme_classic() + scale_color_aaas(name = "Outcomes",
        labels = c("Fear of Covid", "Psychological Distress", "Psychological Health Change")) + labs(title = "Predicting Negative Psychological Health Outcomes") + xlab("Standardized Estimates") + theme(plot.title = element_text(face = "bold", hjust = 0.5),  legend.position = "top", legend.text = element_text(size = 8) ) 
## Warning in geom_dw(df = df, point_args = point_args, segment_args = segment_args, : Ignoring unknown parameters: `width`
## Ignoring unknown parameters: `width`

10.5 Same plot via jtools

lancet <- c('#00468bff',    '#ED0000FF',    '#42B540FF',    '#0099B4FF',
'#925E9FFF',    '#FDAF91FF', '#AD002AFF',   '#ADB6B6FF',    
'#1B1919FF',    '#B09C85FF')



plot_summs(fear.z, distress.z, psyh.z, 
           model.names = c("Fear of Covid", "Psychological Distress", "Psychological Health Change"), coefs = c(
            "County Covid Case %" = "case_p.z",
            "County Death Case %" = "death_p.z" ,
            "Covid-Related Life Events" ="cv13.z" ,
            "Race-Related Covid Risk" ="risk_r.z" ,
           "Covid-Induced Racism" ="covrac.z" ), legend.title = "Outcomes", colors = lancet, point.shape=FALSE, point.size = 6) +labs(title = "Predicting Negative Psychological Health Outcomes") +  xlab("Standardized Estimates") + theme_classic() + theme(plot.title = element_text(face = "bold", hjust = 0.5), axis.title.y = element_blank(), axis.title.x = element_text(face = "bold"), legend.position = "top", legend.text = element_text(size = 8) )
## Loading required namespace: broom.mixed
## Loading required namespace: broom.mixed
## Loading required namespace: broom.mixed

I prefer the dotwhisker package to the jtools only because it provides more customization. For example, here i cannot really change the whisker size or dot shape. But given that both of these plotting functions are based on ggplot2, so if I created the plots in ggplot2, it might provide the optimal customization.

11 Portfolio 10: Logistic and Path

My last portfolio. I thought I wanted to do something epic like rayshader. But I don’t have the appropriate data. And I feel like even if I made it, I won’t be able to use the code everyday because something like that is not very common and it would end up being less useful. So I decided to do logistic regression and path model for an upcoming project I’m planning to submit to SPSP.

11.1 Testing Predictor Correlations

load(file='C:/Users/Colin/Downloads/ICPSR_37166-V2 (1)/ICPSR_37166/DS0007/37166-0007-Data.rda')



W1<-correlation(da37166.0007[c("W1EVERYDAY", "W1SOCSUPPORT", "W1CONNECTEDNESS", "W1INTERNALIZED", "W1FELTSTIGMA", "W1IDCENTRAL")], use = "pairwise")

summary(W1)
## # Correlation Matrix (pearson-method)
## 
## Parameter       | W1IDCENTRAL | W1FELTSTIGMA | W1INTERNALIZED | W1CONNECTEDNESS | W1SOCSUPPORT
## ----------------------------------------------------------------------------------------------
## W1EVERYDAY      |        0.05 |      0.29*** |        0.16*** |            0.05 |     -0.19***
## W1SOCSUPPORT    |        0.03 |     -0.23*** |       -0.19*** |         0.16*** |             
## W1CONNECTEDNESS |     0.47*** |        -0.05 |       -0.20*** |                 |             
## W1INTERNALIZED  |    -0.14*** |      0.20*** |                |                 |             
## W1FELTSTIGMA    |   -3.00e-03 |              |                |                 |             
## 
## p-value adjustment method: Holm (1979)
W2<-correlation(da37166.0007[c("W2EVERYDAY", "W2SOCSUPPORT", "W2CONNECTEDNESS", "W2INTERNALIZED", "W2FELTSTIGMA", "W2IDCENTRAL")], use = "pairwise")

summary(W2)
## # Correlation Matrix (pearson-method)
## 
## Parameter       | W2IDCENTRAL | W2FELTSTIGMA | W2INTERNALIZED | W2CONNECTEDNESS | W2SOCSUPPORT
## ----------------------------------------------------------------------------------------------
## W2EVERYDAY      |        0.06 |      0.24*** |        0.15*** |            0.04 |     -0.21***
## W2SOCSUPPORT    |       0.09* |     -0.24*** |       -0.20*** |         0.22*** |             
## W2CONNECTEDNESS |     0.53*** |      -0.12** |       -0.22*** |                 |             
## W2INTERNALIZED  |    -0.20*** |      0.18*** |                |                 |             
## W2FELTSTIGMA    |   -7.30e-03 |              |                |                 |             
## 
## p-value adjustment method: Holm (1979)
W3<-correlation(da37166.0007[c("W3EVERYDAY", "W3SOCSUPPORT", "W3CONNECTEDNESS", "W3INTERNALIZED", "W3FELTSTIGMA", "W3IDCENTRAL")], use = "pairwise")

summary(W3)
## # Correlation Matrix (pearson-method)
## 
## Parameter       | W3IDCENTRAL | W3FELTSTIGMA | W3INTERNALIZED | W3CONNECTEDNESS | W3SOCSUPPORT
## ----------------------------------------------------------------------------------------------
## W3EVERYDAY      |        0.08 |      0.24*** |        0.15*** |            0.01 |     -0.20***
## W3SOCSUPPORT    |        0.08 |     -0.18*** |       -0.17*** |         0.19*** |             
## W3CONNECTEDNESS |     0.55*** |       -0.11* |       -0.25*** |                 |             
## W3INTERNALIZED  |    -0.18*** |      0.23*** |                |                 |             
## W3FELTSTIGMA    |   -2.85e-03 |              |                |                 |             
## 
## p-value adjustment method: Holm (1979)
plot(summary(correlation(data = da37166.0007[c("W1EVERYDAY", "W1SOCSUPPORT", "W1CONNECTEDNESS", "W1INTERNALIZED", "W1FELTSTIGMA", "W1IDCENTRAL")], rename = c("Everyday Disrimination", "Social Support", "Community Connectedness", "Internalized Homophobia", "Felt Stigma", "Identity Centrality")
))) + theme_classic() + theme(axis.text.x=element_text(vjust = 0.5, angle = 15), plot.title = element_text(face = "bold", hjust = 0.5)) + labs(title = "Wave 1 Predictor Correlations") + scale_fill_gradient2(low = "#E64B35FF",
  mid = "white",
  high = "#4DBBD5FF") + guides(fill = FALSE)
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

plot(summary(correlation(data = da37166.0007[c("W2EVERYDAY", "W2SOCSUPPORT", "W2CONNECTEDNESS", "W2INTERNALIZED", "W2FELTSTIGMA", "W2IDCENTRAL")], rename = c("Everyday Disrimination", "Social Support", "Community Connectedness", "Internalized Homophobia", "Felt Stigma", "Identity Centrality")
))) + theme_classic() + theme(axis.text.x=element_text(vjust = 0.5, angle = 15), plot.title = element_text(face = "bold", hjust = 0.5)) + labs(title = "Wave 2 Predictor Correlations") + scale_fill_gradient2(low = "#E64B35FF",
  mid = "white",
  high = "#4DBBD5FF") + guides(fill = FALSE)
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

plot(summary(correlation(data = da37166.0007[c("W3EVERYDAY", "W3SOCSUPPORT", "W3CONNECTEDNESS", "W3INTERNALIZED", "W3FELTSTIGMA", "W3IDCENTRAL")], rename = c("Everyday Disrimination", "Social Support", "Community Connectedness", "Internalized Homophobia", "Felt Stigma", "Identity Centrality")
))) + theme_classic() + theme(axis.text.x=element_text(vjust = 0.5, angle = 15), plot.title = element_text(face = "bold", hjust = 0.5)) + labs(title = "Wave 3 Predictor Correlations") + scale_fill_gradient2(low = "#E64B35FF",
  mid = "white",
  high = "#4DBBD5FF") + guides(fill = FALSE)
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

Based on the correlation results, perhaps I should take out community engagement or identity centrality in the regression model

11.2 Logistic Regression

#logR<-glmer(W3Q29D ~ W2EVERYDAY + W3EVERYDAY + W2SOCSUPPORT + W3SOCSUPPORT + W2CONNECTEDNESS + W3CONNECTEDNESS + W2INTERNALIZED + W3INTERNALIZED + W2FELTSTIGMA + W3FELTSTIGMA + W2IDCENTRAL + W3IDCENTRAL + (1| STUDYID), data = da37166.0007, family = binomial()) #mixed effect model, not sure I would need it summary(logR)

glm(W2Q29D ~ W1IDCENTRAL, data =da37166.0007, family = binomial)
## 
## Call:  glm(formula = W2Q29D ~ W1IDCENTRAL, family = binomial, data = da37166.0007)
## 
## Coefficients:
## (Intercept)  W1IDCENTRAL  
##     0.39548      0.09789  
## 
## Degrees of Freedom: 867 Total (i.e. Null);  866 Residual
##   (650 observations deleted due to missingness)
## Null Deviance:       1081 
## Residual Deviance: 1079  AIC: 1083
logR_wave1<-glm(W2Q29D ~ W1EVERYDAY  + W1SOCSUPPORT  + W1CONNECTEDNESS  + W1INTERNALIZED  + W1FELTSTIGMA, data =da37166.0007, family = binomial)

summary(logR_wave1)
## 
## Call:
## glm(formula = W2Q29D ~ W1EVERYDAY + W1SOCSUPPORT + W1CONNECTEDNESS + 
##     W1INTERNALIZED + W1FELTSTIGMA, family = binomial, data = da37166.0007)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.8736  -1.3791   0.7815   0.8743   1.3098  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      2.78742    0.64865   4.297 1.73e-05 ***
## W1EVERYDAY      -0.30218    0.12438  -2.429   0.0151 *  
## W1SOCSUPPORT    -0.07573    0.06349  -1.193   0.2329    
## W1CONNECTEDNESS -0.13612    0.14517  -0.938   0.3484    
## W1INTERNALIZED  -0.13904    0.10881  -1.278   0.2013    
## W1FELTSTIGMA    -0.15957    0.08597  -1.856   0.0634 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 999.89  on 803  degrees of freedom
## Residual deviance: 982.76  on 798  degrees of freedom
##   (714 observations deleted due to missingness)
## AIC: 994.76
## 
## Number of Fisher Scoring iterations: 4
logR_wave2<-glm(W3Q29D ~ W2EVERYDAY  + W2SOCSUPPORT  + W2CONNECTEDNESS  + W2INTERNALIZED  + W2FELTSTIGMA, data =da37166.0007, family = binomial)

glm(W3Q29D ~ W2EVERYDAY  + W2SOCSUPPORT  + W2CONNECTEDNESS  + W2INTERNALIZED  + W2FELTSTIGMA, data =da37166.0007, family = binomial)
## 
## Call:  glm(formula = W3Q29D ~ W2EVERYDAY + W2SOCSUPPORT + W2CONNECTEDNESS + 
##     W2INTERNALIZED + W2FELTSTIGMA, family = binomial, data = da37166.0007)
## 
## Coefficients:
##     (Intercept)       W2EVERYDAY     W2SOCSUPPORT  W2CONNECTEDNESS  
##          2.1174          -0.4376           0.1038          -0.1181  
##  W2INTERNALIZED     W2FELTSTIGMA  
##         -0.1199          -0.1304  
## 
## Degrees of Freedom: 564 Total (i.e. Null);  559 Residual
##   (953 observations deleted due to missingness)
## Null Deviance:       654 
## Residual Deviance: 635.3     AIC: 647.3
summary(logR_wave2)
## 
## Call:
## glm(formula = W3Q29D ~ W2EVERYDAY + W2SOCSUPPORT + W2CONNECTEDNESS + 
##     W2INTERNALIZED + W2FELTSTIGMA, family = binomial, data = da37166.0007)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9696  -1.2228   0.6892   0.8028   1.2182  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)   
## (Intercept)      2.11739    0.81915   2.585  0.00974 **
## W2EVERYDAY      -0.43756    0.15024  -2.912  0.00359 **
## W2SOCSUPPORT     0.10380    0.08314   1.249  0.21184   
## W2CONNECTEDNESS -0.11808    0.18341  -0.644  0.51968   
## W2INTERNALIZED  -0.11992    0.14209  -0.844  0.39868   
## W2FELTSTIGMA    -0.13043    0.10910  -1.196  0.23185   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 653.95  on 564  degrees of freedom
## Residual deviance: 635.27  on 559  degrees of freedom
##   (953 observations deleted due to missingness)
## AIC: 647.27
## 
## Number of Fisher Scoring iterations: 4
logR_wave3<-glm(W3Q29D ~ W3EVERYDAY  + W3SOCSUPPORT  + W3CONNECTEDNESS  + W3INTERNALIZED  + W3FELTSTIGMA, data =da37166.0007, family = binomial)

summary(logR_wave3)
## 
## Call:
## glm(formula = W3Q29D ~ W3EVERYDAY + W3SOCSUPPORT + W3CONNECTEDNESS + 
##     W3INTERNALIZED + W3FELTSTIGMA, family = binomial, data = da37166.0007)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9508  -1.3034   0.6971   0.8059   1.2119  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      3.02465    0.76674   3.945 7.99e-05 ***
## W3EVERYDAY      -0.40065    0.14276  -2.806  0.00501 ** 
## W3SOCSUPPORT    -0.00951    0.07930  -0.120  0.90454    
## W3CONNECTEDNESS -0.22417    0.16914  -1.325  0.18505    
## W3INTERNALIZED  -0.26150    0.12976  -2.015  0.04388 *  
## W3FELTSTIGMA    -0.09566    0.09724  -0.984  0.32524    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 752.73  on 645  degrees of freedom
## Residual deviance: 733.99  on 640  degrees of freedom
##   (872 observations deleted due to missingness)
## AIC: 745.99
## 
## Number of Fisher Scoring iterations: 4
#Not sure if using wave 3 data to predict wave 3 outcome is legit

plot_model(logR_wave1, group.terms = c(1, 2, 2, 2, 2), colors=c("#4DBBD5FF","#E64B35FF"), show.values = TRUE, value.offset = .3, vline.color = "#1B191999", title = "Using Wave 1 Predictors to Predict Coming Out in Wave 2", type ="est", transform = NULL) + theme_classic() + theme(plot.title = element_text(face = "bold", hjust = 0.5)) + scale_x_discrete(labels=c("Felt Stigma", "Internalized Homophobia", "Community Connectedness","Social Support", "Everyday Discrimination"))
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

plot_model(logR_wave2, group.terms = c(1, 2, 2, 2, 2), colors=c("#4DBBD5FF","#E64B35FF"), show.values = TRUE, value.offset = .3, vline.color = "#1B191999", title = "Using Wave 2 Predictors to Predict Coming Out in Wave 3", type ="est", transform = NULL) + theme_classic() + theme(plot.title = element_text(face = "bold", hjust = 0.5)) + scale_x_discrete(labels=c("Felt Stigma", "Internalized Homophobia", "Community Connectedness","Social Support", "Everyday Discrimination"))
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

plot_model(logR_wave3, group.terms = c(1, 2, 2, 1, 2), colors=c("#4DBBD5FF","#E64B35FF"), show.values = TRUE, value.offset = .3, vline.color = "#1B191999", title = "Using Wave 3 Predictors to Predict Coming Out in Wave 3", type ="est", transform = NULL) + theme_classic() + theme(plot.title = element_text(face = "bold", hjust = 0.5)) + scale_x_discrete(labels=c("Felt Stigma", "Internalized Homophobia", "Community Connectedness","Social Support", "Everyday Discrimination"))
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.

I changed all the x axis to log-odds. But by default it plots odds ratio, which I don’t quite understand the benefit of it. Because the outcome was coded as (1)yes, (2)no. Greater scores of everyday discrimination means less encounters with discrimination. So the negative prediction of everyday discrimination means more daily discrimination, less likely to come out or less discrimination, more likely to come out.

11.3 Path Models

path1 <-'W2Q29D ~ W1EVERYDAY  + W1SOCSUPPORT  + W1CONNECTEDNESS  + W1INTERNALIZED  + W1FELTSTIGMA

W1INTERNALIZED ~ W1FELTSTIGMA + W1EVERYDAY

W1SOCSUPPORT ~ W1CONNECTEDNESS

'

da37166.0007$W2Q29D<-as.numeric(da37166.0007$W2Q29D=="(1) Yes")


fit1<-sem(path1, data = da37166.0007)

summary(fit1, fit.measures=TRUE, standardized=T)
## lavaan 0.6.15 ended normally after 1 iteration
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        11
## 
##                                                   Used       Total
##   Number of observations                           804        1518
## 
## Model Test User Model:
##                                                       
##   Test statistic                                66.478
##   Degrees of freedom                                 4
##   P-value (Chi-square)                           0.000
## 
## Model Test Baseline Model:
## 
##   Test statistic                               139.610
##   Degrees of freedom                                12
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.510
##   Tucker-Lewis Index (TLI)                      -0.469
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -2707.846
##   Loglikelihood unrestricted model (H1)      -2674.607
##                                                       
##   Akaike (AIC)                                5437.693
##   Bayesian (BIC)                              5489.278
##   Sample-size adjusted Bayesian (SABIC)       5454.347
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.139
##   90 Percent confidence interval - lower         0.111
##   90 Percent confidence interval - upper         0.170
##   P-value H_0: RMSEA <= 0.050                    0.000
##   P-value H_0: RMSEA >= 0.080                    1.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.072
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   W2Q29D ~                                                              
##     W1EVERYDAY        0.066    0.026    2.486    0.013    0.066    0.091
##     W1SOCSUPPORT      0.016    0.013    1.233    0.218    0.016    0.043
##     W1CONNECTEDNES    0.029    0.030    0.969    0.333    0.029    0.034
##     W1INTERNALIZED    0.030    0.023    1.300    0.193    0.030    0.046
##     W1FELTSTIGMA      0.034    0.018    1.885    0.059    0.034    0.069
##   W1INTERNALIZED ~                                                      
##     W1FELTSTIGMA      0.121    0.027    4.496    0.000    0.121    0.161
##     W1EVERYDAY        0.127    0.040    3.188    0.001    0.127    0.114
##   W1SOCSUPPORT ~                                                        
##     W1CONNECTEDNES    0.329    0.083    3.976    0.000    0.329    0.139
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .W2Q29D            0.211    0.011   20.050    0.000    0.211    0.975
##    .W1INTERNALIZED    0.491    0.024   20.050    0.000    0.491    0.951
##    .W1SOCSUPPORT      1.635    0.082   20.050    0.000    1.635    0.981
semPaths(fit1, "std", layout = "tree", edge.width = 1.5, edge.label.cex = 1.5, node.width=2, residuals=FALSE, nodeLabels = c("Coming\nOut", "Internalized\nHomophobia", "Social\nSupport", "Everyday\nDiscrimination", "Community\nConnectedness", "Felt\nStigma"), label.cex =1.2)

path2 <-'W3Q29D ~ W2EVERYDAY  + W2SOCSUPPORT  + W2CONNECTEDNESS  + W2INTERNALIZED  + W2FELTSTIGMA

W2INTERNALIZED ~ W2FELTSTIGMA + W2EVERYDAY

W2SOCSUPPORT ~ W2CONNECTEDNESS

'

da37166.0007$W3Q29D<-as.numeric(da37166.0007$W3Q29D=="(1) Yes")

describe(da37166.0007$W3Q29D)
##    vars   n mean   sd median trimmed mad min max range skew kurtosis   se
## X1    1 692 0.27 0.44      0    0.21   0   0   1     1 1.05     -0.9 0.02
fit2<-sem(path2, data = da37166.0007)

summary(fit2, fit.measures=TRUE, standardized=T)
## lavaan 0.6.15 ended normally after 1 iteration
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        11
## 
##                                                   Used       Total
##   Number of observations                           565        1518
## 
## Model Test User Model:
##                                                       
##   Test statistic                                78.691
##   Degrees of freedom                                 4
##   P-value (Chi-square)                           0.000
## 
## Model Test Baseline Model:
## 
##   Test statistic                               148.034
##   Degrees of freedom                                12
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.451
##   Tucker-Lewis Index (TLI)                      -0.647
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -1815.050
##   Loglikelihood unrestricted model (H1)      -1775.705
##                                                       
##   Akaike (AIC)                                3652.100
##   Bayesian (BIC)                              3699.806
##   Sample-size adjusted Bayesian (SABIC)       3664.886
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.182
##   90 Percent confidence interval - lower         0.148
##   90 Percent confidence interval - upper         0.218
##   P-value H_0: RMSEA <= 0.050                    0.000
##   P-value H_0: RMSEA >= 0.080                    1.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.090
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   W3Q29D ~                                                              
##     W2EVERYDAY        0.089    0.029    3.055    0.002    0.089    0.131
##     W2SOCSUPPORT     -0.020    0.015   -1.308    0.191   -0.020   -0.056
##     W2CONNECTEDNES    0.023    0.034    0.664    0.507    0.023    0.029
##     W2INTERNALIZED    0.024    0.027    0.885    0.376    0.024    0.037
##     W2FELTSTIGMA      0.024    0.021    1.184    0.236    0.024    0.051
##   W2INTERNALIZED ~                                                      
##     W2FELTSTIGMA      0.099    0.032    3.154    0.002    0.099    0.134
##     W2EVERYDAY        0.115    0.045    2.541    0.011    0.115    0.108
##   W2SOCSUPPORT ~                                                        
##     W2CONNECTEDNES    0.494    0.090    5.472    0.000    0.494    0.224
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .W3Q29D            0.188    0.011   16.808    0.000    0.188    0.971
##    .W2INTERNALIZED    0.461    0.027   16.808    0.000    0.461    0.964
##    .W2SOCSUPPORT      1.424    0.085   16.808    0.000    1.424    0.950
semPaths(fit2, "std", layout = "tree", edge.width = 1.5, edge.label.cex = 1.5, node.width=2, residuals=FALSE, nodeLabels = c("Coming\nOut", "Internalized\nHomophobia", "Social\nSupport", "Everyday\nDiscrimination", "Community\nConnectedness", "Felt\nStigma"), label.cex =1.2)

path3 <-'W3Q29D ~ W3EVERYDAY  + W3SOCSUPPORT  + W3CONNECTEDNESS  + W3INTERNALIZED  + W3FELTSTIGMA

W3INTERNALIZED ~ W3FELTSTIGMA + W3EVERYDAY

W3SOCSUPPORT ~ W3CONNECTEDNESS

'


describe(da37166.0007$W3Q29D)
##    vars   n mean   sd median trimmed mad min max range skew kurtosis   se
## X1    1 692 0.27 0.44      0    0.21   0   0   1     1 1.05     -0.9 0.02
fit3<-sem(path3, data = da37166.0007)

summary(fit3, fit.measures=TRUE, standardized=T)
## lavaan 0.6.15 ended normally after 1 iteration
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        11
## 
##                                                   Used       Total
##   Number of observations                           646        1518
## 
## Model Test User Model:
##                                                       
##   Test statistic                                80.150
##   Degrees of freedom                                 4
##   P-value (Chi-square)                           0.000
## 
## Model Test Baseline Model:
## 
##   Test statistic                               164.782
##   Degrees of freedom                                12
##   P-value                                        0.000
## 
## User Model versus Baseline Model:
## 
##   Comparative Fit Index (CFI)                    0.502
##   Tucker-Lewis Index (TLI)                      -0.495
## 
## Loglikelihood and Information Criteria:
## 
##   Loglikelihood user model (H0)              -2074.035
##   Loglikelihood unrestricted model (H1)      -2033.959
##                                                       
##   Akaike (AIC)                                4170.069
##   Bayesian (BIC)                              4219.248
##   Sample-size adjusted Bayesian (SABIC)       4184.324
## 
## Root Mean Square Error of Approximation:
## 
##   RMSEA                                          0.172
##   90 Percent confidence interval - lower         0.140
##   90 Percent confidence interval - upper         0.205
##   P-value H_0: RMSEA <= 0.050                    0.000
##   P-value H_0: RMSEA >= 0.080                    1.000
## 
## Standardized Root Mean Square Residual:
## 
##   SRMR                                           0.085
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   W3Q29D ~                                                              
##     W3EVERYDAY        0.081    0.028    2.915    0.004    0.081    0.117
##     W3SOCSUPPORT      0.002    0.015    0.109    0.913    0.002    0.004
##     W3CONNECTEDNES    0.042    0.031    1.356    0.175    0.042    0.054
##     W3INTERNALIZED    0.053    0.025    2.095    0.036    0.053    0.084
##     W3FELTSTIGMA      0.018    0.019    0.959    0.338    0.018    0.039
##   W3INTERNALIZED ~                                                      
##     W3FELTSTIGMA      0.142    0.028    5.014    0.000    0.142    0.197
##     W3EVERYDAY        0.118    0.043    2.742    0.006    0.118    0.108
##   W3SOCSUPPORT ~                                                        
##     W3CONNECTEDNES    0.411    0.083    4.984    0.000    0.411    0.192
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .W3Q29D            0.191    0.011   17.972    0.000    0.191    0.968
##    .W3INTERNALIZED    0.463    0.026   17.972    0.000    0.463    0.939
##    .W3SOCSUPPORT      1.396    0.078   17.972    0.000    1.396    0.963
semPaths(fit3, "std", layout = "tree", edge.width = 1.5, edge.label.cex = 1.5, node.width=2, residuals=FALSE, nodeLabels = c("Coming\nOut", "Internalized\nHomophobia", "Social\nSupport", "Everyday\nDiscrimination", "Community\nConnectedness", "Felt\nStigma"), label.cex =1.2)

This is my first time building a path model. Obviously the model fit is pretty bad… And the diagram created by the semplot does not look pretty :(, the numbers overlapped. I guess this just isn’t the best package to visualize path analysis results.I’m happy with the other plots I made, but the package that was used to make these semplots doesn’t really allow me to express my creativity.